library(peprDnaStability)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(knitr)
gel_data_dir <- "stability_example"
gel_metadata <- "stability_example/MG001_stability_metadata.csv"

## number of gel lanes ran
number_of_lanes <- 12

Stability Study Experimental Design

gel_design <- read.csv(gel_metadata) %>% 
                mutate(gel = sub(pattern = ".*_Gel_0",replacement =  "Gel-", x = gel),
                       gel = sub(pattern = ".jpg",replacement =  "", x = gel))
ggplot(gel_design) + geom_raster(aes(x = gel, y = lane, fill = condition), alpha = 0.5) +
    geom_text(aes(x = gel, y = lane, label = vial)) + facet_wrap(~gel,nrow = 1, scales = "free_x") + theme_bw() + theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "bottom", legend.direction = "horizontal") + labs(y = "Lane", fill = "Conditions")

Stability experimental design, Text values indicate the vial box and position source within the material lot.

Raw Image processing

Images were croped using preview. Below are the quality control plots for processing the raw image files.

Image Processing QC Figures

Rdat_files <- list.files(path = gel_data_dir,pattern = "Rdata",full.names = TRUE)
lane_labels <- paste0("L",c(paste0("0",1:9),1:12))[1:number_of_lanes]

for(gel_dat in Rdat_files){
    dat_list <- process_gel(gel_dat)
    print(gel_dat)
    annotated_gel(image_mat = dat_list['image_dat'], 
                        lane_edges_df = dat_list[5][[1]]$lane_edges_df , 
                        peaks_df = dat_list['marker_dat'], 
                        lane_labels = lane_labels)
}
## Joining by: c("bins", "y")
## [1] "stability_example/MG001-37C-S-1.Rdata"
## Joining by: c("bins", "y")

## [1] "stability_example/MG001-37C-S-2.Rdata"
## Joining by: c("bins", "y")

## [1] "stability_example/MG001-37C-S-3.Rdata"
## Joining by: c("bins", "y")

## [1] "stability_example/MG001-4C-S-1.Rdata"
## Joining by: c("bins", "y")

## [1] "stability_example/MG001-4C-S-2.Rdata"

Statistical Analysis of Stability Results

batch_data <- batch_process_gels(gel_data_dir)
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## Joining by: c("bins", "y")
## combine data frames
intensity_df <- dplyr::data_frame()
markers_df <- dplyr::data_frame()
binned_df <- dplyr::data_frame()
for(x in batch_data){
    gel_name <- x$gel
    intensity_df <- x$intensity_dat %>%
        dplyr::mutate(gel = gel_name) %>%
        dplyr::bind_rows(intensity_df)
    markers_df <- x$marker_dat %>%
        dplyr::mutate(gel = gel_name) %>%
        dplyr::bind_rows(markers_df)
    binned_df <- x$bin_dat %>%
        dplyr::mutate(gel = gel_name) %>%
        dplyr::bind_rows(binned_df)
}

## adding metadata
meta <- read.csv(gel_metadata,stringsAsFactors = F) %>% filter(condition != "Blank") %>% 
            mutate(lane_labels = lane, lane = gsub("L0","L",lane))
intensity_df$lane <- as.character(intensity_df$lane)
intensity <- left_join(intensity_df, meta) %>% filter(condition != "Blank")
## Joining by: c("lane", "gel")
markers <- left_join(markers_df, meta) %>% filter(condition != "Blank")
## Joining by: c("lane", "gel")
binned_df$lane <- as.character(binned_df$lane)
binned <- left_join(binned_df, meta) %>% filter(condition != "Blank")
## Joining by: c("lane", "gel")
highlow_bin <- binned %>%
    filter(!is.na(vial), bins %in% c("23.1-9.42", "48.5-23.1",
                                     "97-48.5","194-97")) %>%
    mutate(bins = ifelse(bins %in% c("23.1-9.42", "48.5-23.1"), 
                         "low","high")) %>% 
    group_by(lane, bins, gel, vial, condition) %>% 
    summarise(bin_intensity = sum(bin_intensity))

highlow_ratio <- highlow_bin %>% 
    spread(bins, bin_intensity) %>% 
    mutate(bin_ratio = high/(low + high))

highlow_ratio_control<- highlow_ratio %>% filter(condition == "Control")
control_ratio <- highlow_ratio_control %>% 
                    group_by(gel) %>% 
                    summarise(control_ratio = mean(bin_ratio))
highlow_diff <- highlow_ratio %>% 
                    left_join(control_ratio) %>% 
                    mutate(ratio_diff = bin_ratio - control_ratio) %>% 
                    filter(condition != "Control" & condition != "Ladder") %>% 
                    mutate(gel = sub(pattern = ".*Gel_0",replacement = "", x = gel),
                           gel = sub(".jpg","", gel))
## Joining by: "gel"
ggplot(highlow_diff) + geom_point(aes(x = condition, y = ratio_diff, color = gel)) + 
    labs(x = "Treatment", y = "Ratio Difference from Control") + 
    theme_bw()

Stability study high and low molecular weight regions.

make_one_sided_ttest_df <- function (dat, value, gelAsFactor = TRUE) {
    tt_df <- data_frame()
    ## Pairwise comparison to control for each bin and each condition
    for(bin in unique(dat$bins)){
        df <- dat %>% filter(bins == bin)
        y <- df  %>% filter(condition == "Control")  %>% .[[value]]
        for(treatment in unique(df$condition[df$condition != "Control"])){
            x <-df  %>% filter(condition == treatment)  %>% .[[value]]
            tt <- t.test(x,y,alternative = "less")
            tt_df <- data_frame(size_bins = bin,
                                condition = treatment,
                                tstatistic = tt$statistic,
                                pvalue = tt$p.value) %>% 
                bind_rows(tt_df)
        }
    }
    tt_df %>% mutate(sig = ifelse(pvalue < 0.05/n(),TRUE,FALSE))
}
tt_df <- highlow_ratio %>% mutate(bins = "H") %>% 
            filter(condition != "Ladder") %>% 
            make_one_sided_ttest_df(value = "bin_ratio", gelAsFactor = FALSE)
tt_table <- tt_df %>% mutate(pvalue = round(pvalue, 4) %>% as.character(),
                             pvalue = ifelse(sig == TRUE, paste0("__",pvalue,"__"),pvalue),
                             pvalue = ifelse(pvalue == "__0__", "__<0.0000__", pvalue),
                             tstatistic = round(tstatistic, 2)) %>% 
            select(-sig) %>% 
            spread(size_bins, pvalue)
colnames(tt_table) <- c("Treatment", "T-Statistic", "p-value") 

kable(tt_table, row.names = FALSE, caption = "P-value of one sided t-tests comparing proprotion of DNA in high and low molecular weight bins between treatment and control. Significant p-values (alpha = 0.0125) are indicated in bold.")
P-value of one sided t-tests comparing proprotion of DNA in high and low molecular weight bins between treatment and control. Significant p-values (alpha = 0.0125) are indicated in bold.
Treatment T-Statistic p-value
2w37c -0.99 0.1668
2w4c 0.58 0.7162
8w37c -6.87 <0.0000
8w4c 0.57 0.7112
sessionInfo()
## R version 3.2.1 (2015-06-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.9.5 (Mavericks)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] knitr_1.10.5                ggplot2_1.0.1              
## [3] tidyr_0.2.0                 dplyr_0.4.2                
## [5] peprDnaStability_0.0.0.9000
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.0      magrittr_1.5     MASS_7.3-40      munsell_0.4.2   
##  [5] colorspace_1.2-6 R6_2.1.0         jpeg_0.1-8       highr_0.5       
##  [9] stringr_1.0.0    plyr_1.8.3       tools_3.2.1      parallel_3.2.1  
## [13] grid_3.2.1       gtable_0.1.2     DBI_0.3.1        htmltools_0.2.6 
## [17] lazyeval_0.1.10  yaml_2.1.13      assertthat_0.1   digest_0.6.8    
## [21] reshape2_1.4.1   formatR_1.2      evaluate_0.7     rmarkdown_0.7   
## [25] labeling_0.3     stringi_0.5-5    scales_0.2.5     proto_0.3-10